{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 17.004212,
     "end_time": "2021-01-21T13:24:40.500575",
     "exception": false,
     "start_time": "2021-01-21T13:24:23.496363",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%pip install rpy2\n",
    "%load_ext rpy2.ipython\n",
    "# import subprocess\n",
    "# subprocess.run('conda install -c conda-forge r-base', shell=True)\n",
    "# !pip install rpy2\n",
    "import numpy as np\n",
    "import statsmodels.formula.api as smf\n",
    "import rpy2\n",
    "from rpy2.robjects import r, pandas2ri\n",
    "pandas2ri.activate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "install.packages(\"dagitty\", quiet=TRUE)\n",
    "library(dagitty, quiet=TRUE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.005613,
     "end_time": "2021-01-21T13:24:23.482240",
     "exception": false,
     "start_time": "2021-01-21T13:24:23.476627",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Collider Bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.529339,
     "end_time": "2021-01-21T13:24:41.036392",
     "exception": false,
     "start_time": "2021-01-21T13:24:40.507053",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "g = dagitty(\"dag{ T -> C <- B }\")\n",
    "plot(g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
    "papermill": {
     "duration": 3.040016,
     "end_time": "2021-01-21T13:24:44.083514",
     "exception": false,
     "start_time": "2021-01-21T13:24:41.043498",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "#collider bias\n",
    "num_samples = 1000000\n",
    "talent = np.random.normal(size=num_samples)\n",
    "beauty = np.random.normal(size=num_samples)\n",
    "congeniality = talent + beauty + np.random.normal(size=num_samples) #congeniality\n",
    "cond_talent = talent[congeniality > 0]\n",
    "cond_beauty = beauty[congeniality > 0]\n",
    "data = {\"talent\": talent, \"beauty\": beauty, \"congeniality\": congeniality, \"cond_talent\": cond_talent, \"cond_beauty\": cond_beauty}\n",
    "\n",
    "print(smf.ols(\"talent ~ beauty\", data).fit().summary())\n",
    "print(smf.ols(\"talent ~ beauty + congeniality\", data).fit().summary())\n",
    "print(smf.ols(\"cond_talent ~ cond_beauty\", data).fit().summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.008327,
     "end_time": "2021-01-21T13:24:44.101113",
     "exception": false,
     "start_time": "2021-01-21T13:24:44.092786",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "We can also use package Dagitty to illustrate collider bias, also known as M-bias."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
    "papermill": {
     "duration": 0.09231,
     "end_time": "2021-01-21T13:24:44.201974",
     "exception": false,
     "start_time": "2021-01-21T13:24:44.109664",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "print(adjustmentSets( g, \"T\", \"B\" ))\n",
    "\n",
    "## empty set -- we should not condition on the additional\n",
    "## variable C.\n",
    "## Generate data where C = .5T + .5B\n",
    "\n",
    "d <- simulateSEM( g, .5 )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = rpy2.robjects.r[\"d\"]\n",
    "print(smf.ols(\"T ~ B\", data).fit().summary()) # includes 0\n",
    "print(smf.ols(\"T ~ B + C\", data).fit().summary()) # does not include 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.7"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 24.263285,
   "end_time": "2021-01-21T13:24:44.323080",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2021-01-21T13:24:20.059795",
   "version": "2.2.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
